#### ======================================================================
#### Online Appendix: Arrow plots
#### Current vs. desired relationships with China and the United States
#### ----------------------------------------------------------------------
#### Data source:
####   data/complete_aggregated_results_with_subgroups.csv
####
#### Design choices:
####   - Arrows reflect WEIGHTED country-level estimates (Method == "weighted",
####     Subgroup == "all").
####   - Right-margin "n = ..." annotations use UNWEIGHTED respondent counts
####     (Method == "unweighted", Subgroup == "all").
####   - Countries are ordered by |Delta| (absolute gap) in ascending order.
####   - A horizontal divider marks the point below which the difference is
####     not statistically significant (p >= 0.05) using an approximate test
####     computed from aggregated means, SDs, and unweighted N.
####
#### Output:
####   output/arrow-plot-china-6x10.pdf
####   output/arrow-plot-us-6x10.pdf
#### ======================================================================

library(dplyr)
library(ggplot2)
library(readr)
library(grid)   # unit(), arrow()

#### ----------------------------------------------------------------------
#### Paths and output directory
#### ----------------------------------------------------------------------

in_path <- "data/complete_aggregated_results_with_subgroups.csv"
out_dir <- "output"
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)

#### ----------------------------------------------------------------------
#### Plot styling
#### ----------------------------------------------------------------------

base_family    <- "Calibri"
base_size      <- 13
axis_text_size <- 11

# ggplot2 uses millimeters internally for text sizing; converting pt -> mm
.pt        <- 2.845276
annot_size <- axis_text_size / .pt

#### ----------------------------------------------------------------------
#### Countries included
#### ----------------------------------------------------------------------

countries_to_include <- c(
  "Argentina", "Belarus", "Bolivia", "Brazil", "Burkina Faso",
  "Cameroon", "Colombia", "Croatia", "DRC", "Egypt", "Georgia",
  "Ghana", "Haiti", "Hungary", "Indonesia", "Kenya", "Lebanon",
  "Madagascar", "Mali", "Mexico", "Nigeria", "Peru", "Philippines",
  "Poland", "Romania", "Serbia", "Tunisia", "Türkiye", "Uruguay",
  "Venezuela"
)

#### ----------------------------------------------------------------------
#### Load data and harmonize country names
#### ----------------------------------------------------------------------

complete_results <- read_csv(in_path, show_col_types = FALSE) %>%
  mutate(
    Country = recode(
      Country,
      "Democratic Republic of Congo" = "DRC",
      "Turkey" = "Türkiye",
      "the Philippines" = "Philippines"
    )
  ) %>%
  filter(
    Subgroup == "all",
    Country %in% countries_to_include,
    !is.na(Country),
    Country != ""
  )

#### ----------------------------------------------------------------------
#### Weighted estimates (arrows) + unweighted counts (n labels)
#### ----------------------------------------------------------------------

weighted_all <- complete_results %>%
  filter(Method == "weighted") %>%
  select(
    Country, ISO3_code,
    China_Mean_Current, China_SD_Current, China_Mean_Desired, China_SD_Desired, China_Delta,
    USA_Mean_Current,   USA_SD_Current,   USA_Mean_Desired,   USA_SD_Desired,   USA_Delta
  )

unweighted_n <- complete_results %>%
  filter(Method == "unweighted") %>%
  transmute(
    Country,
    ISO3_code,
    N_unweighted = suppressWarnings(as.numeric(N))
  )

plot_base <- weighted_all %>%
  left_join(unweighted_n, by = c("Country", "ISO3_code"))

#### ----------------------------------------------------------------------
#### Helper: approximate two-sided p-value from aggregated summaries
#### ----------------------------------------------------------------------
#### This is used only to place the "Difference not significant below" divider.
#### It treats current and desired means as independent estimates with
#### standard errors computed from SD/sqrt(n).
#### ----------------------------------------------------------------------

p_from_summary <- function(delta, sd_current, sd_desired, n) {
  delta      <- suppressWarnings(as.numeric(delta))
  sd_current <- suppressWarnings(as.numeric(sd_current))
  sd_desired <- suppressWarnings(as.numeric(sd_desired))
  n          <- suppressWarnings(as.numeric(n))
  
  ok <- is.finite(delta) &
    is.finite(sd_current) &
    is.finite(sd_desired) &
    is.finite(n) &
    (n >= 2)
  
  p <- rep(NA_real_, length(delta))
  se <- sqrt((sd_current^2 / n) + (sd_desired^2 / n))
  t  <- delta / se
  
  p[ok] <- 2 * stats::pt(-abs(t[ok]), df = pmax(n[ok] - 1, 1))
  p
}

#### ----------------------------------------------------------------------
#### Data preparation for a given actor (China or USA)
#### ----------------------------------------------------------------------
#### Returns:
####   - data: country rows ordered by |Delta| with an inserted spacer row
####   - divider_index: y-position at which significance switches
#### ----------------------------------------------------------------------

prep_actor <- function(df, actor = c("China", "USA")) {
  actor <- match.arg(actor)
  
  out <- if (actor == "China") {
    df %>%
      transmute(
        Country,
        ISO3_code,
        N_unweighted,
        Mean_Current = suppressWarnings(as.numeric(China_Mean_Current)),
        Mean_Desired = suppressWarnings(as.numeric(China_Mean_Desired)),
        SD_Current   = suppressWarnings(as.numeric(China_SD_Current)),
        SD_Desired   = suppressWarnings(as.numeric(China_SD_Desired)),
        Delta        = suppressWarnings(as.numeric(China_Delta))
      )
  } else {
    df %>%
      transmute(
        Country,
        ISO3_code,
        N_unweighted,
        Mean_Current = suppressWarnings(as.numeric(USA_Mean_Current)),
        Mean_Desired = suppressWarnings(as.numeric(USA_Mean_Desired)),
        SD_Current   = suppressWarnings(as.numeric(USA_SD_Current)),
        SD_Desired   = suppressWarnings(as.numeric(USA_SD_Desired)),
        Delta        = suppressWarnings(as.numeric(USA_Delta))
      )
  }
  
  out <- out %>%
    mutate(
      Abs_Delta    = abs(Delta),
      P_Value      = p_from_summary(Delta, SD_Current, SD_Desired, N_unweighted),
      Significant  = !is.na(P_Value) & (P_Value < 0.05)
    )
  
  # Divider index follows the logic:
  #   - Order by descending p-value (non-significant first).
  #   - The divider is placed at the first significant row.
  is_sig <- out$Significant %in% TRUE
  divider_index <- if (any(is_sig)) sum(!is_sig) + 1 else NA_integer_
  if (!is.na(divider_index) && (divider_index <= 1 || divider_index > nrow(out))) {
    divider_index <- NA_integer_
  }
  
  # Plot order: ascending |Delta|
  out_sorted <- out %>%
    arrange(Abs_Delta) %>%
    mutate(Position = row_number())
  
  # Insert spacer row so the divider sits on an empty line
  if (!is.na(divider_index)) {
    spacer <- out_sorted[1, ]
    spacer[,] <- NA
    spacer$Country   <- ""
    spacer$ISO3_code <- ""
    
    out_sorted <- bind_rows(
      out_sorted[1:(divider_index - 1), ],
      spacer,
      out_sorted[divider_index:nrow(out_sorted), ]
    ) %>%
      mutate(Position = row_number())
  }
  
  list(data = out_sorted, divider_index = divider_index)
}

#### ----------------------------------------------------------------------
#### Plot constructor
#### ----------------------------------------------------------------------

make_arrow_plot <- function(dat, divider_index, x_label) {
  annotations_df <- dat %>%
    filter(Country != "") %>%
    mutate(n_label = ifelse(is.na(N_unweighted), "", paste0("n = ", round(N_unweighted))))
  
  p <- ggplot(dat) +
    geom_segment(
      aes(
        x    = Mean_Current,
        xend = Mean_Desired,
        y    = Position,
        yend = Position
      ),
      linewidth = 0.4,
      colour    = "black",
      arrow     = arrow(type = "closed", length = unit(0.06, "inches"))
    ) +
    # White-out area to the right of 10 to keep the margin clean for n-labels.
    annotate(
      "rect",
      xmin = 10, xmax = Inf,
      ymin = -Inf, ymax = Inf,
      fill = "white",
      color = NA
    ) +
    geom_text(
      data   = annotations_df,
      aes(x = 10.6, y = Position, label = n_label),
      hjust  = 1,
      vjust  = 0.5,
      family = base_family,
      size   = annot_size,
      colour = "black"
    ) +
    scale_y_continuous(
      breaks = seq_len(nrow(dat)),
      labels = dat$Country
    ) +
    scale_x_continuous(
      breaks = 1:10,
      limits = c(1, 11),
      expand = c(0, 0)
    ) +
    labs(x = x_label, y = "") +
    theme_minimal(base_family = base_family, base_size = base_size) +
    theme(
      axis.text.x        = element_text(size = axis_text_size, colour = "black"),
      axis.text.y        = element_text(size = axis_text_size, colour = "black"),
      plot.margin        = margin(r = 40),
      panel.grid.minor.y = element_blank()
    )
  
  # Add significance divider if identifiable.
  if (!is.na(divider_index)) {
    p <- p +
      geom_segment(
        inherit.aes = FALSE,
        aes(x = 1.25, xend = 11, y = divider_index, yend = divider_index),
        colour    = "black",
        alpha     = 0.6,
        linewidth = 0.4
      ) +
      geom_label(
        inherit.aes = FALSE,
        data = data.frame(
          x = 1,
          y = divider_index,
          label = "Difference not significant below \u2193"
        ),
        aes(x = x, y = y, label = label),
        hjust         = 0,
        vjust         = 0.5,
        family        = base_family,
        size          = annot_size,
        label.size    = 0,
        fill          = "white",
        label.padding = unit(3, "pt")
      )
  }
  
  p
}

#### ----------------------------------------------------------------------
#### China plot
#### ----------------------------------------------------------------------

china_obj <- prep_actor(plot_base, "China")

arrow_plot_china <- make_arrow_plot(
  china_obj$data,
  china_obj$divider_index,
  "Attitude Toward China (1 = 'Not close at all', 10 = 'Very close')"
)

print(arrow_plot_china)

ggsave(
  filename = file.path(out_dir, "arrow-plot-china-6x10.pdf"),
  plot     = arrow_plot_china,
  width    = 950 / 96,
  height   = 650 / 96,
  units    = "in",
  dpi      = 300,
  device   = function(...) grDevices::cairo_pdf(..., family = base_family)
)

#### ----------------------------------------------------------------------
#### United States plot
#### ----------------------------------------------------------------------

usa_obj <- prep_actor(plot_base, "USA")

arrow_plot_us <- make_arrow_plot(
  usa_obj$data,
  usa_obj$divider_index,
  "Attitude Toward the United States (1 = 'Not close at all', 10 = 'Very close')"
)

print(arrow_plot_us)

ggsave(
  filename = file.path(out_dir, "arrow-plot-us-6x10.pdf"),
  plot     = arrow_plot_us,
  width    = 950 / 96,
  height   = 650 / 96,
  units    = "in",
  dpi      = 300,
  device   = function(...) grDevices::cairo_pdf(..., family = base_family)
)